{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Fetch Minerva.jsonl from https://github.com/hkust-nlp/simpleRL-reason/blob/main/eval/data/minerva_math/test.jsonl\n",
    "import json\n",
    "\n",
    "# Load jsonl file and convert to json\n",
    "minerva_data = []\n",
    "with open('../test/minerva.json', 'r') as f:\n",
    "    for line in f:\n",
    "        minerva_data = json.loads(line)\n",
    "\n",
    "len(minerva_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Have gemini add the solutions to the numina dataset.\n",
    "# Filter for non-proof problems.\n",
    "import concurrent.futures\n",
    "from concurrent.futures import as_completed\n",
    "from deepscaler.utils import call_gemini_llm\n",
    "from deepscaler.system_prompts import EXTRACT_SOLUTION_PROMPT\n",
    "\n",
    "def get_answer(entry):\n",
    "    # 1) Get the problem text\n",
    "    problem_text = entry['problem']\n",
    "    solution_text = entry['solution']\n",
    "    # 2) Call Gemini LLM\n",
    "    output_list = call_gemini_llm(f'Problem: {problem_text} \\n----\\n Solution: {solution_text}', system_prompt=EXTRACT_SOLUTION_PROMPT, n=3)\n",
    "    output_list = [o for o in output_list if \n",
    "                   'error' not in o and 'Error' not in o and 'Solution not found' not in o]\n",
    "    if not output_list:\n",
    "        print(problem_text)\n",
    "        print(solution_text)\n",
    "        return None\n",
    "\n",
    "    for output_str in output_list:\n",
    "        if 'answer' in entry:\n",
    "            if output_str in entry['answer']:\n",
    "                continue\n",
    "            entry['answer'].append(output_str)\n",
    "        else:\n",
    "            entry['answer'] = [output_str]\n",
    "    return entry\n",
    "\n",
    "results = []\n",
    "idx = 0\n",
    "with concurrent.futures.ThreadPoolExecutor(max_workers=32) as executor:\n",
    "    # 1) Submit all jobs to the executor\n",
    "    futures = [executor.submit(get_answer, entry) for entry in minerva_data]\n",
    "    # 2) Process them as they complete, using tqdm for a progress bar\n",
    "    for future in as_completed(futures):\n",
    "        # Get the result for each completed future\n",
    "        result = future.result()\n",
    "        if result:\n",
    "            results.append(result)\n",
    "        if idx%1000 == 0:\n",
    "            print(idx)\n",
    "            with open(\"olympiad_checkpoint.json\", \"w\") as f:\n",
    "                json.dump(results, f, indent=2)\n",
    "        idx += 1\n",
    "\n",
    "# Save final list as json\n",
    "with open(\"minerva.json\", \"w\") as f:\n",
    "    json.dump(results, f, indent=2)\n",
    "    "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "deepscaler",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
